#
# Copyright (c) 2021, 2023 supercell
#
# SPDX-License-Identifier: BSD-3-Clause
#
require "html"

module Luce
  # Maintains the internal state needed to parse a series of lines into
  # blocks of Markdown suitable for further inline parsing.
  class BlockParser
    getter lines : Array(Line)

    # The Markdown document this parser is parsing
    getter document : Document

    # The enabled block syntaxes
    #
    # To turn a series of lines into blocks, each of these will be
    # tried in turn. Order matters here.
    getter block_syntaxes = [] of BlockSyntax

    # Index of the current line
    @pos : Int32 = 0

    # Starting line of the last unconsumed content.
    @start : Int32 = 0

    # The lines from `@start` to `@pos` (inclusive), it works as a buffer for
    # some blocks, for example:
    # When the `ParagraphSyntax` parsing process is interrupted by the
    # `SetextHeaderSyntax`, so this structure is not a paragraph but a setext
    # heading, then the `ParagraphSyntax#parse` does not have to retreat the
    # reading position, it only needs to return `nil`, the `SetextHeaderSyntax`
    # will pick up the lines in `lines_to_consume`.
    def lines_to_consume : Array(Line)
      @lines[@start..@pos]
    end

    # Whether the parser has encountered a blank line between two
    # block-level elements.
    @[Deprecated("Use `#encountered_blank_line?` instead. Remove at version 1.0.")]
    def encountered_blank_line : Bool
      encountered_blank_line?
    end

    # Whether the parser has encountered a blank line between two
    # block-level elements.
    property? encountered_blank_line : Bool = false

    # The collection of built-in block parsers
    getter standard_block_syntaxes = [
      Luce::EmptyBlockSyntax.new,
      Luce::HTMLBlockSyntax.new,
      Luce::SetextHeaderSyntax.new,
      Luce::HeaderSyntax.new,
      Luce::CodeBlockSyntax.new,
      Luce::BlockquoteSyntax.new,
      Luce::HorizontalRuleSyntax.new,
      Luce::UnorderedListSyntax.new,
      Luce::OrderedListSyntax.new,
      Luce::LinkReferenceDefinitionSyntax.new,
      Luce::ParagraphSyntax.new,
    ]

    # The parent `BlockSyntax` when it is running inside a nested syntax.
    getter parent_syntax : BlockSyntax?

    # Whether the `BlockSyntax` is disabled temporarily.
    getter? setext_heading_disabled : Bool = false

    # The `BlockSyntax` which is running now.
    # The value is `nil` until we found the first matched `BlockSyntax`.
    getter current_syntax : BlockSyntax? = nil

    # The `BlockSyntax` which is running before the `current_syntax`.
    getter previous_syntax : BlockSyntax? = nil

    def initialize(@lines, @document)
      block_syntaxes.concat document.block_syntaxes

      if document.with_default_block_syntaxes?
        block_syntaxes.concat standard_block_syntaxes
      else
        block_syntaxes << DummyBlockSyntax.new
      end
    end

    # Return the current line
    def current : Line
      @lines[@pos]
    end

    # Return the line after the current one or `nil` if there is none.
    def next : Line?
      # Don't read past the end
      return nil if @pos >= @lines.size - 1
      @lines[@pos + 1]
    end

    # Return the line that is *lines_ahead* lines ahead of the current
    # one, or `nil` if there is none.
    #
    # Note that `peek(0)` is equivalent to `current`, and `peek(1)` is
    # equivalent to `next`.
    def peek(lines_ahead : Int32) : Line?
      if lines_ahead < 0
        raise ArgumentError.new("Invalid lines_ahead #{lines_ahead}; must be >= 0.")
      end
      # Don't read past the end.
      return nil if @pos >= (@lines.size - lines_ahead)
      @lines[@pos + lines_ahead]
    end

    # Advance the reading position by one line
    def advance : Nil
      @pos += 1
    end

    # Retreats the reading position by one line
    def retreat : Nil
      @pos -= 1
    end

    # Retreats the reading position by *count* lines.
    def retreat_by(count : Int32) : Nil
      @pos -= count
    end

    def done? : Bool
      @pos >= @lines.size
    end

    # Return if the current line matches the given *regex* or not.
    def matches?(regex : Regex) : Bool
      return false if done?
      regex.matches?(current.content)
    end

    # Return if the next line matches the given *regex* or not.
    def matches_next?(regex : Regex) : Bool
      return false if self.next.nil?
      # ameba:disable Lint/NotNil
      regex.matches?(self.next.not_nil!.content)
    end

    def parse_lines(parent_syntax : BlockSyntax? = nil, disable_setext_heading : Bool = false) : Array(Node)
      @parent_syntax = parent_syntax
      @setext_heading_disabled = disable_setext_heading

      blocks = [] of Node

      # If the @pos does not change before and after `parse()`, never try to
      # parse the line at @pos with the same syntax again.
      # For example, the `TableSyntax` might not advance the @pos in `parse`
      # method, because the header row does not match the delimiter row in the
      # number of cells, which makes the table like structure not be recognized.
      never_match : BlockSyntax? = nil

      iterations_without_progress = 0
      until done?
        position_before = @pos
        block_syntaxes.each do |syntax|
          next if never_match == syntax

          if syntax.can_parse? self
            @previous_syntax = @current_syntax
            @current_syntax = syntax
            block = syntax.parse self
            blocks << block unless block.nil?
            never_match = @pos != position_before ? nil : syntax

            if !block.nil? ||
               syntax.is_a? EmptyBlockSyntax ||
               syntax.is_a? LinkReferenceDefinitionSyntax
              @start = @pos
            end

            break
          end
        end
        # Count the number of iterations without progress.
        # This ensures that we don't have an infinite loop. And if we have an
        # infinite loop, it's easier to gracefully recover from an error, than
        # it is to discover and kill an isolate that's stuck in an infinite loop.
        # Technically, it should be perfectly safe to remove this check, but as
        # it's possible to inject custom BlockSyntax implementations and
        # combine existing ones, it is hard to promise that no combination will
        # trigger an infinite loop.
        if position_before == @pos
          iterations_without_progress += 1
          # If this happens, we throw an error to avoid having the parser
          # running an infinite loop. An error is easier to handle.
          # If you see this error in production, please file a bug!
          raise "BlockParser#parse_lines is not advancing!" if iterations_without_progress > 2
        else
          iterations_without_progress = 0
        end
      end

      blocks
    end
  end
end
